# Table 8: Market Response to Block Entries Depending on the Type of Block and Firm (Simulated Data)

# import simulated sample data (actual data used in paper are generated from proprietary data, e.g., CRSP, Compustat)
# simulated data is used to illustrate the layout of the file

df = pd.read_csv('data/dgtw_simulated_data.txt', sep='|')

# unconditional mean dgtw returns by different investor categories:
import statsmodels.formula.api as smf

# nonfinancial vs financial
# (since data are simulated, estimates do not match paper)

print(smf.ols(formula=f"dgtw ~ C(type)-1 ", data=df).fit().summary())


# 4 types
# (since data are simulated, estimates do not match paper)

print(smf.ols(formula=f"dgtw ~ C(type4)-1 ", data=df).fit().summary())


# short-term vs long-term
# (since data are simulated, estimates do not match paper)

print(smf.ols(formula=f"dgtw ~ C(lt_st)-1 ", data=df).fit().summary())

# Table 8 Panel B (simulated data)

#low vs high volatility:

def terciles(types, value, measure):
	print('-'*60)
	print(f'{value} by {measure}:')
	print('-'*60)
	print('mean returns:')
	print('-'*60)
	print(smf.ols(formula=f"dgtw ~ C({measure}) -1", data=df[(df[types]==value) & (df[measure].isin(['low','high']))]).fit().summary())
	print('-'*60)
	print('difference in mean returns:')
	print('-'*60)
	print(smf.ols(formula=f"dgtw ~ C({measure}) ", data=df[(df[types]==value) & (df[measure].isin(['low','high']))]).fit().summary())

# two types
# nonfinancial
terciles(types='type', value='nonfinancial', measure='vol')
# financial
terciles(types='type', value='financial', measure='vol')

# four types
# individual
terciles(types='type4', value='individual', measure='vol')
# other private
terciles(types='type4', value='other private', measure='vol')
# hedge funds
terciles(types='type4', value='hedge funds', measure='vol')
# institutional
terciles(types='type4', value='institutional', measure='vol')

# short-term vs long-term
# short-term
terciles(types='lt_st', value='short term', measure='vol')
# long-term
terciles(types='lt_st', value='long term', measure='vol')



# Table 8 Panel C (simulated data)

#small vs large size:

def terciles(types, value, measure):
	print('-'*60)
	print(f'{value} by {measure}:')
	print('-'*60)
	print('mean returns:')
	print('-'*60)
	print(smf.ols(formula=f"dgtw ~ C({measure}) -1", data=df[(df[types]==value) & (df[measure].isin(['small','large']))]).fit().summary())
	print('-'*60)
	print('difference in mean returns:')
	print('-'*60)
	print(smf.ols(formula=f"dgtw ~ C({measure}) ", data=df[(df[types]==value) & (df[measure].isin(['small','large']))]).fit().summary())

# two types
# nonfinancial
terciles(types='type', value='nonfinancial', measure='size')
# financial
terciles(types='type', value='financial', measure='size')

# four types
# individual
terciles(types='type4', value='individual', measure='size')
# other private
terciles(types='type4', value='other private', measure='size')
# hedge funds
terciles(types='type4', value='hedge funds', measure='size')
# institutional
terciles(types='type4', value='institutional', measure='size')

# short-term vs long-term
# short-term
terciles(types='lt_st', value='short term', measure='size')
# long-term
terciles(types='lt_st', value='long term', measure='size')


